/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    M5Base.java
 *    Copyright (C) 2000-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.classifiers.trees.m5;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.classifiers.AbstractClassifier;
import weka.core.AdditionalMeasureProducer;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.supervised.attribute.NominalToBinary;
import weka.filters.unsupervised.attribute.RemoveUseless;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;

/**
 * M5Base. Implements base routines for generating M5 Model trees and rules.
 * <p>
 * 
 * The original algorithm M5 was invented by Quinlan: <br/>
 * 
 * Quinlan J. R. (1992). Learning with continuous classes. Proceedings of the
 * Australian Joint Conference on Artificial Intelligence. 343--348. World
 * Scientific, Singapore.
 * <p/>
 * 
 * Yong Wang made improvements and created M5': <br/>
 * 
 * Wang, Y and Witten, I. H. (1997). Induction of model trees for predicting
 * continuous classes. Proceedings of the poster papers of the European
 * Conference on Machine Learning. University of Economics, Faculty of
 * Informatics and Statistics, Prague.
 * <p/>
 * 
 * Valid options are:
 * <p>
 * 
 * -U <br>
 * Use unsmoothed predictions.
 * <p>
 * 
 * -R <br>
 * Build regression tree/rule rather than model tree/rule
 * 
 * @author Mark Hall (mhall@cs.waikato.ac.nz)
 * @version $Revision$
 */
public abstract class M5Base extends AbstractClassifier implements AdditionalMeasureProducer, TechnicalInformationHandler {

    /** for serialization */
    private static final long serialVersionUID = -4022221950191647679L;

    /**
     * the instances covered by the tree/rules
     */
    private Instances m_instances;

    /**
     * the rule set
     */
    protected ArrayList<Rule> m_ruleSet;

    /**
     * generate a decision list instead of a single tree.
     */
    private boolean m_generateRules;

    /**
     * use unsmoothed predictions
     */
    private boolean m_unsmoothedPredictions;

    /**
     * filter to fill in missing values
     */
    private ReplaceMissingValues m_replaceMissing;

    /**
     * filter to convert nominal attributes to binary
     */
    private NominalToBinary m_nominalToBinary;

    /**
     * for removing useless attributes
     */
    private RemoveUseless m_removeUseless;

    /**
     * Save instances at each node in an M5 tree for visualization purposes.
     */
    protected boolean m_saveInstances = false;

    /**
     * Make a regression tree/rule instead of a model tree/rule
     */
    protected boolean m_regressionTree;

    /**
     * Do not prune tree/rules
     */
    protected boolean m_useUnpruned = false;

    /**
     * The minimum number of instances to allow at a leaf node
     */
    protected double m_minNumInstances = 4;

    /**
     * Constructor
     */
    public M5Base() {
        m_generateRules = false;
        m_unsmoothedPredictions = false;
        m_useUnpruned = false;
        m_minNumInstances = 4;
    }

    /**
     * Returns an instance of a TechnicalInformation object, containing detailed
     * information about the technical background of this class, e.g., paper
     * reference or book this class is based on.
     * 
     * @return the technical information about this class
     */
    @Override
    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation result;
        TechnicalInformation additional;

        result = new TechnicalInformation(Type.INPROCEEDINGS);
        result.setValue(Field.AUTHOR, "Ross J. Quinlan");
        result.setValue(Field.TITLE, "Learning with Continuous Classes");
        result.setValue(Field.BOOKTITLE, "5th Australian Joint Conference on Artificial Intelligence");
        result.setValue(Field.YEAR, "1992");
        result.setValue(Field.PAGES, "343-348");
        result.setValue(Field.PUBLISHER, "World Scientific");
        result.setValue(Field.ADDRESS, "Singapore");

        additional = result.add(Type.INPROCEEDINGS);
        additional.setValue(Field.AUTHOR, "Y. Wang and I. H. Witten");
        additional.setValue(Field.TITLE, "Induction of model trees for predicting continuous classes");
        additional.setValue(Field.BOOKTITLE, "Poster papers of the 9th European Conference on Machine Learning");
        additional.setValue(Field.YEAR, "1997");
        additional.setValue(Field.PUBLISHER, "Springer");

        return result;
    }

    /**
     * Returns an enumeration describing the available options
     * 
     * @return an enumeration of all the available options
     */
    @Override
    public Enumeration<Option> listOptions() {
        Vector<Option> newVector = new Vector<Option>(4);

        newVector.add(new Option("\tUse unpruned tree/rules", "N", 0, "-N"));

        newVector.add(new Option("\tUse unsmoothed predictions", "U", 0, "-U"));

        newVector.add(new Option("\tBuild regression tree/rule rather " + "than a model tree/rule", "R", 0, "-R"));

        newVector.add(new Option("\tSet minimum number of instances " + "per leaf\n\t(default 4)", "M", 1, "-M <minimum number of instances>"));

        newVector.addAll(Collections.list(super.listOptions()));

        return newVector.elements();
    }

    /**
     * Parses a given list of options.
     * <p/>
     * 
     * Valid options are:
     * <p>
     * 
     * -U <br>
     * Use unsmoothed predictions.
     * <p>
     * 
     * -R <br>
     * Build a regression tree rather than a model tree.
     * <p>
     * 
     * @param options the list of options as an array of strings
     * @throws Exception if an option is not supported
     */
    @Override
    public void setOptions(String[] options) throws Exception {
        setUnpruned(Utils.getFlag('N', options));
        setUseUnsmoothed(Utils.getFlag('U', options));
        setBuildRegressionTree(Utils.getFlag('R', options));
        String optionString = Utils.getOption('M', options);
        if (optionString.length() != 0) {
            setMinNumInstances((new Double(optionString)).doubleValue());
        }
        super.setOptions(options);
        Utils.checkForRemainingOptions(options);
    }

    /**
     * Gets the current settings of the classifier.
     * 
     * @return an array of strings suitable for passing to setOptions
     */
    @Override
    public String[] getOptions() {

        Vector<String> result = new Vector<String>();

        if (getUnpruned()) {
            result.add("-N");
        }

        if (getUseUnsmoothed()) {
            result.add("-U");
        }

        if (getBuildRegressionTree()) {
            result.add("-R");
        }

        result.add("-M");
        result.add("" + getMinNumInstances());

        Collections.addAll(result, super.getOptions());

        return result.toArray(new String[result.size()]);
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String unprunedTipText() {
        return "Whether unpruned tree/rules are to be generated.";
    }

    /**
     * Use unpruned tree/rules
     * 
     * @param unpruned true if unpruned tree/rules are to be generated
     */
    public void setUnpruned(boolean unpruned) {
        m_useUnpruned = unpruned;
    }

    /**
     * Get whether unpruned tree/rules are being generated
     * 
     * @return true if unpruned tree/rules are to be generated
     */
    public boolean getUnpruned() {
        return m_useUnpruned;
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String generateRulesTipText() {
        return "Whether to generate rules (decision list) rather than a tree.";
    }

    /**
     * Generate rules (decision list) rather than a tree
     * 
     * @param u true if rules are to be generated
     */
    protected void setGenerateRules(boolean u) {
        m_generateRules = u;
    }

    /**
     * get whether rules are being generated rather than a tree
     * 
     * @return true if rules are to be generated
     */
    protected boolean getGenerateRules() {
        return m_generateRules;
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String useUnsmoothedTipText() {
        return "Whether to use unsmoothed predictions.";
    }

    /**
     * Use unsmoothed predictions
     * 
     * @param s true if unsmoothed predictions are to be used
     */
    public void setUseUnsmoothed(boolean s) {
        m_unsmoothedPredictions = s;
    }

    /**
     * Get whether or not smoothing is being used
     * 
     * @return true if unsmoothed predictions are to be used
     */
    public boolean getUseUnsmoothed() {
        return m_unsmoothedPredictions;
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String buildRegressionTreeTipText() {
        return "Whether to generate a regression tree/rule instead of a model tree/rule.";
    }

    /**
     * Get the value of regressionTree.
     * 
     * @return Value of regressionTree.
     */
    public boolean getBuildRegressionTree() {

        return m_regressionTree;
    }

    /**
     * Set the value of regressionTree.
     * 
     * @param newregressionTree Value to assign to regressionTree.
     */
    public void setBuildRegressionTree(boolean newregressionTree) {

        m_regressionTree = newregressionTree;
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String minNumInstancesTipText() {
        return "The minimum number of instances to allow at a leaf node.";
    }

    /**
     * Set the minimum number of instances to allow at a leaf node
     * 
     * @param minNum the minimum number of instances
     */
    public void setMinNumInstances(double minNum) {
        m_minNumInstances = minNum;
    }

    /**
     * Get the minimum number of instances to allow at a leaf node
     * 
     * @return a <code>double</code> value
     */
    public double getMinNumInstances() {
        return m_minNumInstances;
    }

    /**
     * Returns default capabilities of the classifier, i.e., of LinearRegression.
     * 
     * @return the capabilities of this classifier
     */
    @Override
    public Capabilities getCapabilities() {

        Capabilities result = super.getCapabilities();
        result.disableAll();

        // attributes
        result.enable(Capabilities.Capability.NOMINAL_ATTRIBUTES);
        result.enable(Capabilities.Capability.NUMERIC_ATTRIBUTES);
        result.enable(Capabilities.Capability.DATE_ATTRIBUTES);
        result.enable(Capabilities.Capability.MISSING_VALUES);

        // class
        result.enable(Capabilities.Capability.NUMERIC_CLASS);
        result.enable(Capabilities.Capability.DATE_CLASS);
        result.enable(Capabilities.Capability.MISSING_CLASS_VALUES);

        return result;
    }

    /**
     * Generates the classifier.
     * 
     * @param data set of instances serving as training data
     * @throws Exception if the classifier has not been generated successfully
     */
    @Override
    public void buildClassifier(Instances data) throws Exception {
        // can classifier handle the data?
        getCapabilities().testWithFail(data);

        // remove instances with missing class
        data = new Instances(data);
        data.deleteWithMissingClass();

        m_instances = new Instances(data);

        m_replaceMissing = new ReplaceMissingValues();
        m_replaceMissing.setInputFormat(m_instances);
        m_instances = Filter.useFilter(m_instances, m_replaceMissing);

        m_nominalToBinary = new NominalToBinary();
        m_nominalToBinary.setInputFormat(m_instances);
        m_instances = Filter.useFilter(m_instances, m_nominalToBinary);

        m_removeUseless = new RemoveUseless();
        m_removeUseless.setInputFormat(m_instances);
        m_instances = Filter.useFilter(m_instances, m_removeUseless);

        m_instances.randomize(new Random(1));

        m_ruleSet = new ArrayList<Rule>();

        Rule tempRule;

        if (m_generateRules) {
            Instances tempInst = m_instances;

            do {
                tempRule = new Rule();
                tempRule.setSmoothing(!m_unsmoothedPredictions);
                tempRule.setRegressionTree(m_regressionTree);
                tempRule.setUnpruned(m_useUnpruned);
                tempRule.setSaveInstances(false);
                tempRule.setMinNumInstances(m_minNumInstances);
                tempRule.buildClassifier(tempInst);
                m_ruleSet.add(tempRule);
                // System.err.println("Built rule : "+tempRule.toString());
                tempInst = tempRule.notCoveredInstances();
                tempRule.freeNotCoveredInstances();
            } while (tempInst.numInstances() > 0);
        } else {
            // just build a single tree
            tempRule = new Rule();

            tempRule.setUseTree(true);
            // tempRule.setGrowFullTree(true);
            tempRule.setSmoothing(!m_unsmoothedPredictions);
            tempRule.setSaveInstances(m_saveInstances);
            tempRule.setRegressionTree(m_regressionTree);
            tempRule.setUnpruned(m_useUnpruned);
            tempRule.setMinNumInstances(m_minNumInstances);

            Instances temp_train;

            temp_train = m_instances;

            tempRule.buildClassifier(temp_train);

            m_ruleSet.add(tempRule);

            // System.err.print(tempRule.m_topOfTree.treeToString(0));
        }

        // save space
        m_instances = new Instances(m_instances, 0);
    }

    /**
     * Calculates a prediction for an instance using a set of rules or an M5 model
     * tree
     * 
     * @param inst the instance whos class value is to be predicted
     * @return the prediction
     * @throws Exception if a prediction can't be made.
     */
    @Override
    public double classifyInstance(Instance inst) throws Exception {
        Rule temp;
        double prediction = 0;
        boolean success = false;

        m_replaceMissing.input(inst);
        inst = m_replaceMissing.output();
        m_nominalToBinary.input(inst);
        inst = m_nominalToBinary.output();
        m_removeUseless.input(inst);
        inst = m_removeUseless.output();

        if (m_ruleSet == null) {
            throw new Exception("Classifier has not been built yet!");
        }

        if (!m_generateRules) {
            temp = m_ruleSet.get(0);
            return temp.classifyInstance(inst);
        }

        boolean cont;
        int i;

        for (i = 0; i < m_ruleSet.size(); i++) {
            cont = false;
            temp = m_ruleSet.get(i);

            try {
                prediction = temp.classifyInstance(inst);
                success = true;
            } catch (Exception e) {
                cont = true;
            }

            if (!cont) {
                break;
            }
        }

        if (!success) {
            System.out.println("Error in predicting (DecList)");
        }
        return prediction;
    }

    /**
     * Returns a description of the classifier
     * 
     * @return a description of the classifier as a String
     */
    @Override
    public String toString() {
        StringBuffer text = new StringBuffer();
        Rule temp;

        if (m_ruleSet == null) {
            return "Classifier hasn't been built yet!";
        }

        if (m_generateRules) {
            text.append("M5 " + ((m_useUnpruned == true) ? "unpruned " : "pruned ") + ((m_regressionTree == true) ? "regression " : "model ") + "rules ");

            if (!m_unsmoothedPredictions) {
                text.append("\n(using smoothed linear models) ");
            }

            text.append(":\n");

            text.append("Number of Rules : " + m_ruleSet.size() + "\n\n");

            for (int j = 0; j < m_ruleSet.size(); j++) {
                temp = m_ruleSet.get(j);

                text.append("Rule: " + (j + 1) + "\n");
                text.append(temp.toString());
            }
        } else {
            temp = m_ruleSet.get(0);
            text.append(temp.toString());
        }
        return text.toString();
    }

    /**
     * Returns an enumeration of the additional measure names
     * 
     * @return an enumeration of the measure names
     */
    @Override
    public Enumeration<String> enumerateMeasures() {
        Vector<String> newVector = new Vector<String>(1);
        newVector.add("measureNumRules");
        return newVector.elements();
    }

    /**
     * Returns the value of the named measure
     * 
     * @param additionalMeasureName the name of the measure to query for its value
     * @return the value of the named measure
     * @throws Exception if the named measure is not supported
     */
    @Override
    public double getMeasure(String additionalMeasureName) {
        if (additionalMeasureName.compareToIgnoreCase("measureNumRules") == 0) {
            return measureNumRules();
        } else {
            throw new IllegalArgumentException(additionalMeasureName + " not supported (M5)");
        }
    }

    /**
     * return the number of rules
     * 
     * @return the number of rules (same as # linear models & # leaves in the tree)
     */
    public double measureNumRules() {
        if (m_generateRules) {
            return m_ruleSet.size();
        }
        return m_ruleSet.get(0).m_topOfTree.numberOfLinearModels();
    }

    public RuleNode getM5RootNode() {
        Rule temp = m_ruleSet.get(0);
        return temp.getM5RootNode();
    }
}
